library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)

RRPLOTS and flchain

odata <- flchain
odata$chapter <- NULL
pander::pander(table(odata$death))
0 1
5705 2169
rownames(odata) <- c(1:nrow(odata))
data <- as.data.frame(model.matrix(Surv(futime,death)~.,odata))

data$`(Intercept)` <- NULL

dataFL <- as.data.frame(cbind(time=odata[rownames(data),"futime"],
                              status=odata[rownames(data),"death"],
                              data))
pander::pander(table(dataFL$status))
0 1
4562 1962
dataFL$time <- dataFL$time/365

Exploring Raw Features with RRPlot

convar <- colnames(dataFL)[lapply(apply(dataFL,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(dataFL[,c("status",convar)],"status")
pander::pander(topvar)
age kappa lambda creatinine
0 0 0 0
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]

topFeature <- RRPlot(cbind(dataFL$status,dataFL[,topFive[1]]),
                  title=topFive[1])

  par(op)

## With Survival Analysis
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
  RRanalysis[[idx]] <- RRPlot(cbind(dataFL$status,dataFL[,topf]),
                  timetoEvent=dataFL$time,
                  atRate=c(0.90,0.80),
                  title=topf)
  idx <- idx + 1
  par(op)
}

names(RRanalysis) <- topFive

Reporting the Metrics

pander::pander(t(RRanalysis[[1]]$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @:0.8 @MAX_BACC @MAX_RR @SPE100
Thr 73.000 69.000 68.000 54.000 50.00000
RR 4.013 4.399 4.448 5.662 1.00000
RR_LCI 3.740 4.045 4.087 4.500 0.00000
RR_UCI 4.305 4.783 4.842 7.125 0.00000
SEN 0.581 0.713 0.721 0.964 1.00000
SPE 0.883 0.790 0.785 0.232 0.00526
BACC 0.732 0.752 0.753 0.598 0.50263
ROCAUC <- NULL
CstatCI <- NULL
RRatios <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL

for (topf in topFive)
{
  CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
  RRatios <- rbind(RRatios,RRanalysis[[topf]]$RR_atP)
  LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
  Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
  Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
  ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive

pander::pander(ROCAUC)
  est lower upper
age 0.822 0.810 0.833
kappa 0.682 0.667 0.697
lambda 0.665 0.650 0.680
creatinine 0.594 0.578 0.610
pander::pander(CstatCI)
  mean.C Index median lower upper
age 0.775 0.775 0.763 0.785
kappa 0.671 0.671 0.659 0.684
lambda 0.657 0.657 0.645 0.670
creatinine 0.589 0.590 0.575 0.603
pander::pander(LogRangp)
age 0.00e+00
kappa 4.90e-175
lambda 4.41e-145
creatinine 2.67e-67
pander::pander(Sensitivity)
  est lower upper
age 0.581 0.558 0.602
kappa 0.319 0.298 0.340
lambda 0.300 0.279 0.321
creatinine 0.288 0.269 0.309
pander::pander(Specificity)
  est lower upper
age 0.883 0.873 0.892
kappa 0.900 0.891 0.908
lambda 0.899 0.890 0.907
creatinine 0.865 0.854 0.875
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
  ROCAUC C-Stat Sen Spe
age 0.822 0.775 0.581 0.883
kappa 0.682 0.671 0.319 0.900
lambda 0.665 0.657 0.300 0.899
creatinine 0.594 0.589 0.288 0.865

Train Test Set

trainsamples <- sample(nrow(dataFL),0.5*nrow(dataFL))
dataFLTrain <- dataFL[trainsamples,]
dataFLTest <- dataFL[-trainsamples,]


pander::pander(table(dataFLTrain$status))
0 1
2308 954
pander::pander(table(dataFLTest$status))
0 1
2254 1008

Cox Modeling

ml <- BSWiMS.model(Surv(time,status)~.,data=dataFLTrain,loops=0)
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower mean upper u.Accuracy r.Accuracy
age 0.0973 0.0722 0.0784 0.0843 0.706 0.599
flc.grp 0.0663 0.0326 0.0593 0.0867 0.599 0.721
lambda 0.1765 0.1081 0.1529 0.2003 0.657 0.715
Table continues below
  full.Accuracy u.AUC r.AUC full.AUC IDI NRI
age 0.721 0.733 0.622 0.747 0.19781 0.8792
flc.grp 0.721 0.622 0.747 0.747 0.00495 0.2889
lambda 0.721 0.618 0.742 0.747 0.00476 0.0406
  z.IDI z.NRI Delta.AUC Frequency
age 27.22 26.23 -0.12456 1
flc.grp 4.11 7.66 0.00000 1
lambda 3.87 1.07 -0.00526 1

Cox Model Performance

The evaluation of the raw Cox model with RRPlot()

timeinterval <- 5

h0 <- sum(dataFLTrain$status & dataFLTrain$time <= timeinterval)
h0 <- h0/sum((dataFLTrain$time > timeinterval) | (dataFLTrain$status==1))

pander::pander(t(c(h0=h0,timeinterval=timeinterval)),caption="Initial Parameters")
Initial Parameters
h0 timeinterval
0.129 5
index <- predict(ml,dataFLTrain)
rdata <- cbind(dataFLTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories

obsexp <- rrAnalysisTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 953 893 1015 1280 1.09e-21
low 257 227 290 348 3.93e-07
90% 155 132 181 180 5.76e-02
80% 542 497 590 750 1.40e-15
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Test results

index <- predict(ml,dataFLTest)
rtestdata <- cbind(dataFLTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rtestdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTest$time,
                     title="Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

By Risk Categories on test set

obsexp <- rrAnalysisTest$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 1006 945 1070 1274 7.27e-15
low 278 246 313 340 5.73e-04
90% 136 114 161 166 1.79e-02
80% 594 547 644 767 9.83e-11
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Test: Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataFLTrain,"status","time")

( 15.12552 , 17249.48 , 1.265159 , 953 , 1045.192 )

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.252 0.694 15.1

The RRplot() of the calibrated model

index <- predict(ml,dataFLTrain)

calrdata <- cbind(dataFLTrain$status,ppoisGzero(index,calprob$h0))


rrAnalysisCalTrain <- RRPlot(calrdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTrain$time,
                     title="Cal. Train: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories

obsexp <- rrAnalysisCalTrain$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 953 893 1015 828 2.22e-05
low 257 227 290 225 3.58e-02
90% 155 132 181 117 7.16e-04
80% 542 497 590 485 1.10e-02
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Cal. Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)

Checking the test set

index <- predict(ml,dataFLTest)
rtestdata <- cbind(dataFLTest$status,ppoisGzero(index,calprob$h0))

rrAnalysisCalTest <- RRPlot(rtestdata,atRate=c(0.90,0.80),
                     timetoEvent=dataFLTest$time,
                     title="Cal. Test: Breast Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=calprob$timeInterval)

By Risk Categories test set

obsexp <- rrAnalysisCalTest$OERatio$atThrEstimates
pander::pander(obsexp)
  Observed L.CI H.CI Expected pvalue
Total 1006 945 1070 825 9.58e-10
low 278 246 313 220 1.82e-04
90% 136 114 161 108 7.95e-03
80% 594 547 644 496 1.97e-05
maxx <- 1.1*max(c(obsexp$Observed,obsexp$Expected))
minx <- 0.9*min(c(obsexp$Observed,obsexp$Expected))

plot(obsexp$Expected,obsexp$Observed,
     xlim=c(minx,maxx),
     ylim=c(minx,maxx),
     main="Test Set. Expected vs Observed",
     ylab="Observed",
     xlab="Expected",
     col=rainbow(nrow(obsexp)),
     cex=1.5,
     log="xy")

errbar(obsexp$Expected,obsexp$Observed,obsexp$L.CI,obsexp$H.CI,add=TRUE,pch=0,errbar.col=rainbow(nrow(obsexp)),cex=0.75)
lines(x=c(1,maxx),y=c(1,maxx),lty=2)
text(obsexp$Expected,obsexp$Observed,rownames(obsexp),pos=2,cex=0.75)